{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "is_executing": false
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from datetime import timedelta, date"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [],
   "source": [
    "STOCKS_FILE = 'magazine_stocks_clean.csv'\n",
    "DOLLAR_PRICE_FILE = 'dollar_prices_clean.csv'\n",
    "TWEETS_FILE = 'magazine_tweets_clean.csv'"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n",
     "is_executing": false
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "pycharm": {
     "is_executing": false,
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "data_stocks = pd.read_csv(STOCKS_FILE, sep=',')\n",
    "data_dollar = pd.read_csv(DOLLAR_PRICE_FILE, sep=',')\n",
    "tweets = pd.read_csv(TWEETS_FILE, sep=',')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "pycharm": {
     "is_executing": false,
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "text": [
      "c:\\programdata\\anaconda3\\envs\\stock-market\\lib\\site-packages\\pandas\\core\\frame.py:4102: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame\n\nSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  errors=errors,\n"
     ],
     "output_type": "stream"
    }
   ],
   "source": [
    "merged_data = data_stocks[data_stocks['date'] < \"2019-10-19 00:00:00\"]\n",
    "merged_data.drop(merged_data.columns[[0]], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "pycharm": {
     "is_executing": false,
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "text": [
      "c:\\programdata\\anaconda3\\envs\\stock-market\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  \"\"\"Entry point for launching an IPython kernel.\nc:\\programdata\\anaconda3\\envs\\stock-market\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  \nc:\\programdata\\anaconda3\\envs\\stock-market\\lib\\site-packages\\ipykernel_launcher.py:3: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  This is separate from the ipykernel package so we can avoid doing imports until\nc:\\programdata\\anaconda3\\envs\\stock-market\\lib\\site-packages\\ipykernel_launcher.py:4: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  after removing the cwd from sys.path.\nc:\\programdata\\anaconda3\\envs\\stock-market\\lib\\site-packages\\ipykernel_launcher.py:5: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame\n\nSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  \"\"\"\n"
     ],
     "output_type": "stream"
    }
   ],
   "source": [
    "merged_data['dollar_open'] = np.nan\n",
    "merged_data['dollar_high'] = np.nan\n",
    "merged_data['dollar_low'] = np.nan\n",
    "merged_data['dollar_close'] = np.nan\n",
    "merged_data.sort_values('date', inplace=True)\n",
    "\n",
    "for i in merged_data.index:\n",
    "    current_date = merged_data.at[i, 'date']\n",
    "    current_day, current_time = current_date.split(' ')\n",
    "    hour, min_sec = current_time.split(':', 1)\n",
    "\n",
    "    if current_day == \"2019-09-23\":\n",
    "        dollar_minuto_inform = data_dollar.loc[data_dollar['date'] == \"2019-09-20 21:00:00\"]\n",
    "    else:\n",
    "        dollar_minuto_inform = data_dollar.loc[data_dollar['date'] == current_day + ' ' + str(int(hour) + 4) + ':' + min_sec]\n",
    "\n",
    "    merged_data.at[i, 'dollar_open'] = dollar_minuto_inform.iloc[0]['open']\n",
    "    merged_data.at[i, 'dollar_high'] = dollar_minuto_inform.iloc[0]['high']\n",
    "    merged_data.at[i, 'dollar_low'] = dollar_minuto_inform.iloc[0]['low']\n",
    "    merged_data.at[i, 'dollar_close'] = dollar_minuto_inform.iloc[0]['close']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "pycharm": {
     "is_executing": false,
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "text": [
      "c:\\programdata\\anaconda3\\envs\\stock-market\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  \"\"\"Entry point for launching an IPython kernel.\nc:\\programdata\\anaconda3\\envs\\stock-market\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  \n"
     ],
     "output_type": "stream"
    },
    {
     "data": {
      "text/plain": "   id                 date     open     high      low    close  volume  \\\n0   1  2019-09-10 09:07:00  33.0013  33.0316  32.7213  32.8513       0   \n1   2  2019-09-10 09:08:00  32.8810  33.1225  32.8522  33.0411  301337   \n2   3  2019-09-10 09:09:00  33.0200  33.3100  33.0200  33.1300  177100   \n3   4  2019-09-10 09:10:00  33.1506  33.1761  32.9759  33.0006  179428   \n4   5  2019-09-10 09:11:00  33.0012  33.1010  32.9008  32.9112  204443   \n\n   dollar_open  dollar_high  dollar_low  dollar_close  sentiment_polarity  \\\n0       4.1004       4.1015      4.0974        4.1007                 0.0   \n1       4.1014       4.1020      4.0971        4.0994                 0.0   \n2       4.0987       4.0995      4.0961        4.0979                 0.0   \n3       4.0972       4.0990      4.0956        4.0971                 0.0   \n4       4.0981       4.0990      4.0956        4.0974                 0.0   \n\n   sentiment_subjectivity  \n0                     0.0  \n1                     0.0  \n2                     0.0  \n3                     0.0  \n4                     0.0  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>id</th>\n      <th>date</th>\n      <th>open</th>\n      <th>high</th>\n      <th>low</th>\n      <th>close</th>\n      <th>volume</th>\n      <th>dollar_open</th>\n      <th>dollar_high</th>\n      <th>dollar_low</th>\n      <th>dollar_close</th>\n      <th>sentiment_polarity</th>\n      <th>sentiment_subjectivity</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>0</td>\n      <td>1</td>\n      <td>2019-09-10 09:07:00</td>\n      <td>33.0013</td>\n      <td>33.0316</td>\n      <td>32.7213</td>\n      <td>32.8513</td>\n      <td>0</td>\n      <td>4.1004</td>\n      <td>4.1015</td>\n      <td>4.0974</td>\n      <td>4.1007</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>1</td>\n      <td>2</td>\n      <td>2019-09-10 09:08:00</td>\n      <td>32.8810</td>\n      <td>33.1225</td>\n      <td>32.8522</td>\n      <td>33.0411</td>\n      <td>301337</td>\n      <td>4.1014</td>\n      <td>4.1020</td>\n      <td>4.0971</td>\n      <td>4.0994</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>2</td>\n      <td>3</td>\n      <td>2019-09-10 09:09:00</td>\n      <td>33.0200</td>\n      <td>33.3100</td>\n      <td>33.0200</td>\n      <td>33.1300</td>\n      <td>177100</td>\n      <td>4.0987</td>\n      <td>4.0995</td>\n      <td>4.0961</td>\n      <td>4.0979</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>3</td>\n      <td>4</td>\n      <td>2019-09-10 09:10:00</td>\n      <td>33.1506</td>\n      <td>33.1761</td>\n      <td>32.9759</td>\n      <td>33.0006</td>\n      <td>179428</td>\n      <td>4.0972</td>\n      <td>4.0990</td>\n      <td>4.0956</td>\n      <td>4.0971</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <td>4</td>\n      <td>5</td>\n      <td>2019-09-10 09:11:00</td>\n      <td>33.0012</td>\n      <td>33.1010</td>\n      <td>32.9008</td>\n      <td>32.9112</td>\n      <td>204443</td>\n      <td>4.0981</td>\n      <td>4.0990</td>\n      <td>4.0956</td>\n      <td>4.0974</td>\n      <td>0.0</td>\n      <td>0.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "output_type": "execute_result",
     "execution_count": 6
    }
   ],
   "source": [
    "merged_data['sentiment_polarity'] = 0.0\n",
    "merged_data['sentiment_subjectivity'] = 0.0\n",
    "merged_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "pycharm": {
     "is_executing": false,
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "text": [
      "c:\\programdata\\anaconda3\\envs\\stock-market\\lib\\site-packages\\pandas\\core\\indexing.py:494: SettingWithCopyWarning: \nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value instead\n\nSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n  self.obj[item] = s\n"
     ],
     "output_type": "stream"
    }
   ],
   "source": [
    "def daterange(date1, date2):\n",
    "    for n in range(int ((date2 - date1).days)+1):\n",
    "        yield date1 + timedelta(n)\n",
    "\n",
    "start_dt = date(2019,9,9)\n",
    "end_dt = date(2019,10,18)\n",
    "\n",
    "weekdays = [6,7]\n",
    "valid_dates = []\n",
    "for dt in daterange(start_dt, end_dt):\n",
    "    if dt.isoweekday() not in weekdays:\n",
    "        valid_dates.append(dt.strftime(\"%Y-%m-%d\"))\n",
    "\n",
    "for i in range(1, len(valid_dates)):\n",
    "    tweets_of_day = tweets.loc[(tweets['date'] > \"{:s} 23:59:59\".format(valid_dates[i - 1])) &\n",
    "                               (tweets['date'] <= \"{:s} 23:59:59\".format(valid_dates[i]))]\n",
    "    \n",
    "    merged_data.loc[(merged_data['date'] >= \"{:s} 23:59:59\".format(valid_dates[i - 1])) & (merged_data['date'] <= \"{:s} 23:59:59\".format(valid_dates[i])), 'sentiment_polarity'] = tweets_of_day['sentiment_polarity'].mean()\n",
    "    merged_data.loc[(merged_data['date'] >= \"{:s} 23:59:59\".format(valid_dates[i - 1])) & (merged_data['date'] <= \"{:s} 23:59:59\".format(valid_dates[i])), 'sentiment_subjectivity'] = tweets_of_day['sentiment_subjectivity'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "pycharm": {
     "is_executing": false,
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "merged_data.to_csv(STOCKS_FILE.split('_', 1)[0] + \"_dataset.csv\", sep=',', encoding='utf-8')\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  },
  "pycharm": {
   "stem_cell": {
    "cell_type": "raw",
    "source": [],
    "metadata": {
     "collapsed": false
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}